Loading libraries

library(DESeq2)
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.1.3
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
library(tximport)
library(affy)
library(ggplot2)
library(vsn)
library(ggrepel)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(FactoMineR)
library(genefilter)
## 
## Attaching package: 'genefilter'
## The following objects are masked from 'package:MatrixGenerics':
## 
##     rowSds, rowVars
## The following objects are masked from 'package:matrixStats':
## 
##     rowSds, rowVars
library(reshape2)
library(RColorBrewer)
library(pheatmap)
library(EnhancedVolcano)
## Registered S3 methods overwritten by 'ggalt':
##   method                  from   
##   grid.draw.absoluteGrob  ggplot2
##   grobHeight.absoluteGrob ggplot2
##   grobWidth.absoluteGrob  ggplot2
##   grobX.absoluteGrob      ggplot2
##   grobY.absoluteGrob      ggplot2
library(viridis)
## Loading required package: viridisLite
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following object is masked from 'package:matrixStats':
## 
##     count
## The following objects are masked from 'package:GenomicRanges':
## 
##     intersect, setdiff, union
## The following object is masked from 'package:GenomeInfoDb':
## 
##     intersect
## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tracktables)
library(GenomicFeatures)
## Loading required package: AnnotationDbi
## 
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
## 
##     select
library(ChIPseeker)
## 
## Registered S3 method overwritten by 'ggtree':
##   method      from 
##   identify.gg ggfun
## ChIPseeker v1.30.3  For help: https://guangchuangyu.github.io/software/ChIPseeker
## 
## If you use ChIPseeker in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Qing-Yu He. ChIPseeker: an R/Bioconductor package for ChIP peak annotation, comparison and visualization. Bioinformatics 2015, 31(14):2382-2383
library(ggbio)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## Need specific help about ggbio? try mailing 
##  the maintainer or visit https://lawremi.github.io/ggbio/
## 
## Attaching package: 'ggbio'
## The following objects are masked from 'package:ggplot2':
## 
##     geom_bar, geom_rect, geom_segment, ggsave, stat_bin, stat_identity,
##     xlim
library(readxl)
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following object is masked from 'package:ChIPseeker':
## 
##     .
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following object is masked from 'package:matrixStats':
## 
##     count
## The following object is masked from 'package:IRanges':
## 
##     desc
## The following object is masked from 'package:S4Vectors':
## 
##     rename
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.10.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##   genomic data. Bioinformatics 2016.
## 
## The new InteractiveComplexHeatmap package can directly export static 
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
## ! pheatmap() has been masked by ComplexHeatmap::pheatmap(). Most of the arguments
##    in the original pheatmap() are identically supported in the new function. You 
##    can still use the original function by explicitly calling pheatmap::pheatmap().
## 
## Attaching package: 'ComplexHeatmap'
## The following object is masked from 'package:pheatmap':
## 
##     pheatmap
## The following object is masked from 'package:genefilter':
## 
##     dist2
library(clusterProfiler)
## clusterProfiler v4.2.2  For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
## 
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141
## 
## Attaching package: 'clusterProfiler'
## The following objects are masked from 'package:plyr':
## 
##     arrange, mutate, rename, summarise
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## The following object is masked from 'package:IRanges':
## 
##     slice
## The following object is masked from 'package:S4Vectors':
## 
##     rename
## The following object is masked from 'package:stats':
## 
##     filter
library(enrichplot)
library(mgsa)
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:IRanges':
## 
##     space
## The following object is masked from 'package:S4Vectors':
## 
##     space
## The following object is masked from 'package:stats':
## 
##     lowess
colors <- viridis(5)[c(1, 5)]

Loading count table

cnts <- readRDS("/Volumes/GoogleDrive/My Drive/Greally_Lab/Toxoplasma/Toxoplasma/ATAC-Seq/Quantification/Infected_optimal_peaks.counts.rds")

sampleTable <- data.frame(condition = factor(c(rep("Infected",
    3), rep("Uninfected", 3))), row.names = colnames(cnts))

dds <- DESeqDataSetFromMatrix(countData = assay(cnts), colData = sampleTable,
    design = ~condition, rowRanges = rowRanges(cnts))

Count distribution

  • not a lot of low counts, filtering seems to be uneccessary here
# total counts
barplot(colSums(counts(dds)), horiz = TRUE, col = colors[colData(dds)$condition],
    las = 1, xlab = "Counts", main = "Total Counts")
legend("topright", legend = levels(colData(dds)$condition), lwd = 1,
    col = colors)

# Unique regions
barplot(colSums(counts(dds) > 0), horiz = TRUE, col = colors[colData(dds)$condition],
    las = 1, xlab = "Unique Regions", main = "Regions detected")
legend("topright", legend = levels(colData(dds)$condition), lwd = 1,
    col = colors)

# plotting an unnormalized density plot of the log
# transformed counts
plotDensity(log2(counts(dds) + 1), lty = 1, col = colors[colData(dds)$condition],
    lwd = 1, xlab = "log2(Counts + 1)", main = "Raw Counts")
legend("topright", legend = levels(colData(dds)$condition), lwd = 1,
    col = colors)

# plotting an unnormalized box plot of the log transformed
# counts
boxplot(log2(counts(dds) + 1), col = colors[colData(dds)$condition],
    cex.axis = 0.5, las = 1, horizontal = TRUE, xlab = "log2(Counts + 1)",
    ylab = "Samples", main = "Raw Counts")
legend("topright", legend = levels(colData(dds)$condition), lwd = 1,
    col = colors)

Remove uninfected since there are no counts

sampleTable <- data.frame(condition = factor(c(rep("Infected",
    3))), row.names = colnames(cnts)[1:3])

dds <- DESeqDataSetFromMatrix(countData = assay(cnts)[, 1:3],
    colData = sampleTable, design = ~1, rowRanges = rowRanges(cnts)[,
        1:3])

Running DESeq

# Run DESeq
dds <- DESeq(dds)
## Warning in DESeq(dds): the design is ~ 1 (just an intercept). is this intended?
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing

Count distribution after normalization

# total reads
barplot(colSums(counts(dds)), horiz = TRUE, col = colors[colData(dds)$condition],
    las = 1, xlab = "Fragments", ylab = "Samples", main = "Total Fragments")

# density plot normalized
plotDensity(log2(counts(dds, normalized = TRUE) + 1), lty = 1,
    col = colors[colData(dds)$condition], lwd = 1, xlab = "log2(Counts + 1)",
    main = "Normalized Counts")
legend("topright", legend = levels(colData(dds)$condition), lwd = 1,
    col = colors)

# box plot normalized
boxplot(log2(counts(dds, normalized = TRUE) + 1), col = colors[colData(dds)$condition],
    cex.axis = 0.5, las = 1, horizontal = TRUE, xlab = "log2(Counts + 1)",
    ylab = "Samples", main = "Normalized Counts")
legend("topright", legend = levels(colData(dds)$condition), lwd = 1,
    col = colors)

Mean Variance Relationship

## Computing mean and variance
norm.counts <- counts(dds, normalized = TRUE)
mean.counts <- rowMeans(norm.counts)
variance.counts <- apply(norm.counts, MARGIN = 1, var)

## Mean and variance relationship
mean.var.col <- densCols(x = log2(mean.counts), y = log2(variance.counts))
plot(x = log2(mean.counts), y = log2(variance.counts), pch = 16,
    cex = 0.5, col = mean.var.col, main = "Mean-variance relationship",
    xlab = "Mean log2(normalized counts) per gene", ylab = "Variance of log2(normalized counts)",
    panel.first = grid())
abline(a = 1, b = 1, col = "red")  # a linear line to compare against

##Estimation of Dispersion

plotDispEsts(dds)

sum(mcols(dds, use.names = TRUE)[, "dispOutlier"])
## [1] 27

Transform counts to stabalize mean-variance for clustering

dds_rlog <- rlog(dds, blind = FALSE)
dds_vst <- vst(dds, blind = FALSE)
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.

Log2 transformed counts just for comparison. It looks like this does a good enough job.

dds_log2 <- log2(counts(dds) + 1)
meanSdPlot(dds_log2)
## Warning: Computation failed in `stat_binhex()`:

Compare rlog and vst transformations

meanSdPlot(assay(dds_rlog))
## Warning: Computation failed in `stat_binhex()`:

meanSdPlot(assay(dds_vst))
## Warning: Computation failed in `stat_binhex()`:

Heatmap of the sample-to-sample distances

sampleDists <- dist(t(assay(dds_vst)))
sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <- paste(colnames(dds_vst))
colnames(sampleDistMatrix) <- paste(colnames(dds_vst))
colors <- magma(255)
pheatmap(sampleDistMatrix, clustering_distance_rows = sampleDists,
    clustering_distance_cols = sampleDists, col = colors)

Correlation between replicates

require(corrplot)
## Loading required package: corrplot
## corrplot 0.92 loaded
library("PerformanceAnalytics")
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## The following object is masked from 'package:S4Vectors':
## 
##     first
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:gplots':
## 
##     textplot
## The following object is masked from 'package:graphics':
## 
##     legend
res <- cor(assay(dds_vst))

corrplot(res, type = "upper", order = "hclust", tl.col = "black",
    tl.srt = 45)

chart.Correlation(assay(dds_vst), histogram = TRUE, pch = 19)

Review results in IGV

dir = "/Volumes/GoogleDrive/My Drive/Greally_Lab/Toxoplasma/Toxoplasma/ATAC-Seq/Differential Accessiblity/"
res.granges <- rowRanges(dds)
res.granges <- cbind(as.data.frame(res.granges@ranges), counts(dds,
    normalized = TRUE))  # add counts
res.granges <- res.granges[order(rowRanges(dds)$baseMean, decreasing = TRUE),
    ]
makebedtable(res.granges, "res.granges.html", dir)
## [1] "/Volumes/GoogleDrive/My Drive/Greally_Lab/Toxoplasma/Toxoplasma/ATAC-Seq/Differential Accessiblity//res.granges.html"

Annotate results

library(ChIPseeker)

# custom annoation
chrominfo <- read.table("/Volumes/GoogleDrive/My Drive/Greally_Lab/Toxoplasma/Genomes/ME49/GCF_000006565.2_TGA4_genomic.chrom_info.txt",
    header = TRUE)
TxDb.TGA4 <- makeTxDbFromGFF("/Volumes/GoogleDrive/My Drive/Greally_Lab/Toxoplasma/Genomes/ME49/GCF_000006565.2_TGA4_genomic.gff",
    format = "gff", organism = "Toxoplasma", taxonomyId = 1208370,
    dbxrefTag = "locus_tag", chrominfo = chrominfo)
## Import genomic features from the file as a GRanges object ... OK
## Prepare the 'metadata' data frame ... OK
## Make the TxDb object ...
## Warning in .extract_transcripts_from_GRanges(tx_IDX, gr, mcols0$type, mcols0$ID, : some transcripts have no "transcript_id" attribute ==> their name
##   ("tx_name" column in the TxDb object) was set to NA
## Warning in .extract_transcripts_from_GRanges(tx_IDX, gr, mcols0$type, mcols0$ID, : the transcript names ("tx_name" column in the TxDb object) imported
##   from the "transcript_id" attribute are not unique
## OK
customAnnotation <- list(version = "custom", TGA4 = transcripts(TxDb.TGA4))

# annotate
res.granges.annot <- rowRanges(dds)
res.granges.annot@elementMetadata <- cbind(res.granges.annot@elementMetadata,
    assay(dds_vst))
res.granges.annot <- annotatePeak(res.granges.annot, TxDb = TxDb.TGA4,
    level = "gene")
## >> preparing features information...      2022-04-20 13:23:18 
## >> identifying nearest features...        2022-04-20 13:23:18 
## >> calculating distance from peak to TSS...   2022-04-20 13:23:18 
## >> assigning genomic annotation...        2022-04-20 13:23:18 
## >> assigning chromosome lengths           2022-04-20 13:23:20 
## >> done...                    2022-04-20 13:23:20
# plot
plotAnnoPie(res.granges.annot)

plotDistToTSS(res.granges.annot, title = "Distribution of transcription factor-binding loci\nrelative to TSS")

res.granges.annot <- as.GRanges(res.granges.annot)
res.granges.annot <- res.granges.annot[abs(res.granges.annot$distanceToTSS) <
    5000]

# write the results
write.csv(res.granges.annot, paste(dir, "Infected_DESeq2_annotated_chipseeker_ATAC.csv",
    sep = ""))
output <- res.granges.annot[, 22:32]
names(output) <- NULL
makebedtable(output, "res.granges.annotate.chipseeker.html",
    dir)
## [1] "/Volumes/GoogleDrive/My Drive/Greally_Lab/Toxoplasma/Toxoplasma/ATAC-Seq/Differential Accessiblity//res.granges.annotate.chipseeker.html"

Explore Results

Combine annotated and non-annotated

res.granges.merge <- merge(rowRanges(dds), res.granges.annot)

# add meta data
tga4_map <- read.table("/Volumes/GoogleDrive/My Drive/Greally_Lab/Toxoplasma/Genomes/ME49/ID-description_mapping.txt",
    sep = "\t")
mcols(res.granges.merge)$description <- tga4_map[match(res.granges.merge$geneId,
    tga4_map$V1), ]$V2

Plot peaks over chromosomes

chrs <- c("NC_031467.1", "NC_031468.1", "NC_031469.1", "NC_031470.1",
    "NC_031471.1", "NC_031472.1", "NC_031473.1", "NC_031474.1",
    "NC_031475.1", "NC_031476.1", "NC_031477.1", "NC_031478.1",
    "NC_031479.1", "NC_031480.1")
map <- read_excel("/Volumes/GoogleDrive/My Drive/Greally_Lab/Toxoplasma/Genomes/ME49/NCBI_chromosome_mapping.xlsx")
dds.granges.plot <- res.granges.merge
dds.granges.plot <- keepSeqlevels(dds.granges.plot, pruning.mode = "tidy",
    chrs)
seqlevels(dds.granges.plot) <- map$`Molecule name`[-c(15:16)]  # leave out unmappedContig and apicoplast

# only show top 10 labels
dds.granges.plot <- dds.granges.plot[order(dds.granges.plot$baseMean,
    decreasing = TRUE), ]
labels <- dds.granges.plot$description
dds.granges.plot$description <- ""
dds.granges.plot$description[1:10] <- labels[1:10]

plotGrandLinear(dds.granges.plot, aes(y = log2(baseMean)), color = c("red",
    "blue"), ylab = "Log2(mean accessibility)", spaceline = TRUE,
    xlim = c(0, 7e+07)) + theme(axis.text.x = element_text(angle = 45,
    hjust = 1), text = element_text(size = 15, family = "Arial")) +
    geom_text_repel(aes(label = dds.granges.plot$description),
        color = "black", max.overlaps = Inf, size = 5)
## using coord:genome to parse x scale
## Warning: Removed 2 rows containing missing values (geom_text_repel).

autoplot(dds.granges.plot, layout = "karyogram", aes(color = log2(baseMean),
    fill = log2(baseMean))) + scale_color_gradient(low = "blue",
    high = "red") + scale_fill_gradient(low = "blue", high = "red")
## Warning in getIdeoGR(data): geom(ideogram) need valid seqlengths information for accurate mapping,
##                  now use reduced information as ideogram...
## Warning in getIdeoGR(data): geom(ideogram) need valid seqlengths information for accurate mapping,
##                  now use reduced information as ideogram...
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.

Heatmap

dds_vst.mat <- as.matrix(res.granges.annot@elementMetadata[,
    c("I1", "I2", "I3")])

# divide into 3 clusters
clust <- kmeans(dds_vst.mat, 3)

split <- factor(paste0("Cluster\n", clust$cluster), levels = c("Cluster\n1",
    "Cluster\n2", "Cluster\n3"))

HM <- Heatmap(dds_vst.mat, name = "Variance-stabilized Accessibility",
    row_names_gp = gpar(fontsize = 7), split = split, cluster_row_slices = TRUE,
    border = TRUE, cluster_rows = TRUE, cluster_columns = FALSE,
    show_row_names = FALSE, column_names_rot = 0, show_parent_dend_line = FALSE,
    row_dend_width = unit(20, "mm"), heatmap_legend_param = list(direction = "horizontal"))

draw(HM, heatmap_legend_side = "bottom")

Gene ontology analysis

Setting up

high <- res.granges.annot$geneId[clust$cluster == 1]
high <- high[-c(1:7)]  # remove apicoplast
medium <- res.granges.annot$geneId[clust$cluster == 2]
low <- res.granges.annot$geneId[clust$cluster == 3]

## build GO database read GO annotation file
GAF <- readGAF(filename = "/Volumes/GoogleDrive/My Drive/Greally_Lab/Toxoplasma/Genomes/ME49/ToxoDB-53_TgondiiME49_GO.gaf")
## Loading required package: GO.db
## Loading required package: RSQLite
## Loading required package: DBI
## Warning: RSQLite: Passing numeric values to row.names is deprecated. Pass a
## logical or a column name.
## Warning in result_fetch(res@ptr, n = n): SQL statements must be issued with
## dbExecute() or dbSendStatement() instead of dbGetQuery() or dbSendQuery().

## Warning in result_fetch(res@ptr, n = n): SQL statements must be issued with
## dbExecute() or dbSendStatement() instead of dbGetQuery() or dbSendQuery().
# extract relevant info.  unfortunately could not find
# accessor functions for all required info, thus sometimes
# had to utilize object slots directly (with @)
mapping.index <- GAF@itemName2ItemIndex
ID.annotations <- itemAnnotations(GAF)

GO.sets <- GAF@sets
GO.annotation <- setAnnotations(GAF)


# create a 2-column data frame with GOID and ID index after
# little further processing, this will be used as input for
# clusterProfiler
GO.df <- data.frame(GOID = rep(names(GO.sets), sapply(GO.sets,
    length)), ID.index = unlist(GO.sets), row.names = NULL)


# do some processing for objects GO.annotation and GO.df in
# both remove category 'all', and to GO.df also add column
# with Uniprot ids

# GO.annotation
GO.annotation <- GO.annotation[GO.annotation[, "term"] != "all",
    ]
GO.annotation[, "GOID"] <- rownames(GO.annotation)

# GO.df
GO.df <- GO.df[GO.df[, "GOID"] != "all", ]
GO.df[, "GeneID"] <- names(mapping.index[GO.df[, "ID.index"]])

Compare clusters

cluster.list <- list(High = high, Medium = medium, Low = low)
ck <- compareCluster(geneCluster = cluster.list, fun = "enricher",
    TERM2GENE = GO.df[, c("GOID", "GeneID")], TERM2NAME = GO.annotation[,
        c("GOID", "term")])
dotplot(ck, showCategory = 20)

write.csv(ck@compareClusterResult, paste(dir, "GO_enrichment.csv",
    sep = ""))